################################################################################################
#######  Volha Charnysh. 2024. Uprooted: How post-WWII Population Transfers Remade Europe. CUP
#######  Chapter 8. Replication for county-level analysis  #####
#######  R version 4.3.1 (2023-06-16) -- "Beagle Scouts".  #####
#######  Platform: aarch64-apple-darwin20 (64-bit)         #####    
################################################################

rm(list = ls())
library(openxlsx)
library(readstata13)
library(stargazer)
library(sandwich)
library(lmtest)
library(stringr)
library(arm) # for standardize
library(knitr) #for knitting
library(ggplot2) #for plotting
library(texreg)
library(xtable)
library(conleyreg)

##############################################################
####### CREATE FUNCTIONS FOR CROSS-SECTIONAL ANALYSIS ########
##############################################################

#function to create formulas
formulas <- function(a, b) {
  formulas <- list()
  n <- 0
  for (i in a) {
    for (j in b) {
      n <- n + 1
      formulas[n] <- paste(i, j)
    }
  }
  return(formulas)
}

#function to run models
run_models <- function(formlist) {
  models <- list()
  n <- 0
  for (i in 1:length(formlist)){
    n <- n + 1
    models[[n]] <- eval(parse(text=formlist[[n]])) 
  }
  return(models)
}

#vcov function 
hc1<-function(x) vcovHC(x, type="HC1")  

#function to extract coefficients + calculate errors
run_coef <- function(output) {
  stdev <- list() 
  n <- 0
  for (i in 1:length(output)){
    n <- n + 1
    stdev[[n]] <- coeftest(output[[n]], vcov. = hc1)
  }
  return(stdev)    
}

###################
##### Read in datasets 

data_1987m<-read.csv("chapter8/data_1950_1987fin.csv")

head(data_1987m) #ID=KREIS87

distance<-read.csv("chapter8/distEastBorder.csv") #This file has latitude and longitude.

data_1987m2<-merge(data_1987m, distance[, c(2:5,6)], by="KREIS87")


##Compute self-employed to population as a proxy for entrepreneurship: 
data_1987m2$Selbst50pt<-data_1987m2$vza021/(data_1987m2$vza003/1000) 
data_1987m2$Selbst61pt<-data_1987m2$vzb073/(data_1987m2$vzb001/1000)
data_1987m2$Selbst70pt<-data_1987m2$vzc107/(data_1987m2$vzc001/1000)
data_1987m2$Selbst87pt<-data_1987m2$vz87i57k/(data_1987m2$vz87i07k/1000)

#Education levels
data_1987m2$ShareHigherEdu1970<-100*(data_1987m2$vzc081/(data_1987m2$vzc001-data_1987m2$vzc214-data_1987m2$vzc215)) #divided by population aged 15 and older

data_1987m2$ShareHigherEdu1987<-100*(data_1987m2$vz87i72k/(data_1987m2$vz87i07k-data_1987m2$vz87i08k-data_1987m2$vz87i09k-data_1987m2$vz87i10k-
                                                             data_1987m2$vz87i11k-data_1987m2$vz87i12k)) #divided by population aged 15 and older

#Agricultural employment measurement based on data availability:

data_1987m2$Agr1950<-data_1987m2$vza025/data_1987m2$vza019 # vza025 is Erwerbspersonen Land- und Forstwirtschaft
data_1987m2$Agr1950<-data_1987m2$vza025/data_1987m2$vza019 # vza025 is Erwerbspersonen Land- und Forstwirtschaft
data_1987m2$Agr1961<-data_1987m2$vzb082/data_1987m2$vzb079 #Erwerbspersonen WO Land- und Forstwirtschaft

data_1987m2$AgrErwrbst1970<-data_1987m2$vzc190/data_1987m2$vzc105 #Erwerbstätige Land-, Forst- und Tierwirtschaft
data_1987m2$AgrErwrbst1987w<-data_1987m2$vz87i41k/data_1987m2$vz87i40k #from Erwerbstätige WO (wohnungsort) #using this variable


#Manufacturing jobs (Verarbeitendes Gewerbe), doing beschaftigte bc fewer NAs 
data_1987m2$ManufErwerbspers1950<-data_1987m2$vza026/data_1987m2$vza019
data_1987m2$ManufBesch1961<-data_1987m2$vzb112/data_1987m2$vzb108 #from Erwerbstätige (excludes unemployed) 
data_1987m2$ManufBesch1970<-data_1987m2$vzc146/data_1987m2$vzc142 #from Erwerbstätige - a lot of NAs 
data_1987m2$ManufBesch1987<-data_1987m2$az87i24k/data_1987m2$besch87 #from Erwerbstätige. vz87i47k



#################################################################################################
########## Explanatory variables: expellees and heterogeneity of expellee population  ###########
#################################################################################################

data_1987m2$ShareExpellees1950<-data_1987m2$Heimatvertriebene1950/data_1987m2$vza003
data_1987m2$ShareExpellees1946<-data_1987m2$Expellees1946/data_1987m2$Bevolkerung1946
data_1987m2$ShareReichDeutsche1950<-data_1987m2$FromOstgebieten1950/data_1987m2$vza003
data_1987m2$ShareVolksDeutsche1950<-(data_1987m2$FromCzechSl1950+data_1987m2$FromOther1950)/data_1987m2$vza003
data_1987m2$ShareSudeten1950<-data_1987m2$FromCzechSl1950/data_1987m2$vza003
data_1987m2$ShareOther1950<-data_1987m2$FromOther1950/data_1987m2$vza003

data_1987m2[which(is.na(data_1987m2$ShareExpellees1946)), 1:9]
data_1987m2[which(is.na(data_1987m2$ShareExpellees1950)), 1:9]
data_1987m2$FrenchZone[which(is.na(data_1987m2$ShareExpellees1946))]  #one zero? 

data_1987m2$ShareRefugees1961<-data_1987m2$vzb042/data_1987m2$vzb001 
data_1987m2$ShareOstExpelleesOnly1961<-data_1987m2$vzb043/data_1987m2$vzb001 

data_1987m2$largestGr<-ifelse(data_1987m2$ShareReichDeutsche1950>data_1987m2$ShareSudeten1950 & data_1987m2$ShareReichDeutsche1950>data_1987m2$ShareOther1950, "Reichsdeu", 
                              ifelse(data_1987m2$ShareSudeten1950>data_1987m2$ShareReichDeutsche1950 & data_1987m2$ShareSudeten1950>data_1987m2$ShareOther1950, "Sudetendeu", "Other"))

# Diversity of refugee population by origin: 
data_1987m2$DivOrMig1950<-1-((data_1987m2$FromOstgebieten1950/data_1987m2$Heimatvertriebene1950)^2+(data_1987m2$FromCzechSl1950/data_1987m2$Heimatvertriebene1950)^2+(data_1987m2$FromOther1950/data_1987m2$Heimatvertriebene1950)^2)
summary(data_1987m2$DivOrMig1950) 
sd(data_1987m2$DivOrMig1950, na.rm=TRUE) 



###############################
##### control variables:  #####
###############################

data_1987m2$ShareDestroyed<-data_1987m2$KriegsschaedenNWohnungen/data_1987m2$NormalWohnungen 
data_1987m2$ShareMen1950<-data_1987m2$vza004/data_1987m2$vza003  #varies between 42% and 50%

#Pre-war population size
data_1987m2$LnPop1939<-log(data_1987m2$vza001)
data_1987m2$ShareCatholic1939<-data_1987m2$Katholische1939/data_1987m2$Population1939
data_1987m2$ShareAgric1939<-data_1987m2$AgricultureInsgesamt1939/data_1987m2$Erwerbspersonen_Total1939 
data_1987m2$bevd39 #pre-war population density

#Religious fractionalization in 1939: 
data_1987m2$ShareProt1939<-data_1987m2$Evangelische1939/data_1987m2$Population1939
data_1987m2$ShareOther1939<-(data_1987m2$Population1939-data_1987m2$Evangelische1939-data_1987m2$Katholische1939)/data_1987m2$Population1939
data_1987m2$ReligFrac1939<-1-(data_1987m2$ShareCatholic1939^2+data_1987m2$ShareProt1939^2+data_1987m2$ShareOther1939^2)


data_1987m2$lnPop1939<-log(data_1987m2$vza001)
data_1987m2$lnPop1950<-log(data_1987m2$vza003)
data_1987m2$Aged20.60<-(data_1987m2$vza008+data_1987m2$vza009)/data_1987m2$vza003 #above one? 
data_1987m2$lnDistEastBorder<-log(data_1987m2$DistEastBorder)


data_1987m2$Zone<-ifelse(data_1987m2$land %in% c("Schleswig-Holztein",  "Niedersachsen", "Nordrhein-Westfalen", "Hamburg"), "British", ifelse(data_1987m2$land %in%  c("Bayern", "Hesen", "Bremen")   | data_1987m2$regbez=="RB Stuttgart", "American", ifelse(data_1987m2$land %in% c("Saarland", "Berlin (West)"), NA, "French")))
data_1987m2$Zone[data_1987m2$name %in% c("KS Karlsruhe", "Karlsruhe",  "Rhein-Neckar-Kreis", "Neckar-Odenwald-Kreis", "KS Pforzheim", "KS Mannheim")]<-"American"
table(data_1987m2$Zone)
data_1987m2$FrenchZone<-ifelse(data_1987m2$Zone=="French", 1, 0)
data_1987m2$FrenchSaarland<-ifelse(data_1987m2$Zone=="French" | is.na(data_1987m2$Zone), 1, 0)

data_1987m2$name[is.na(data_1987m2$FrenchZone)] #Saarland has NAs for French zone; it was rejoined the Republic of Germany in 1957. 

################################################
#####  Table 8.1: Correlations         #########
################################################

corrs<-rbind(cbind(cor.test(data_1987m2$DivOrMig1950, data_1987m2$ShareExpellees1950)$estimate, summary(lm(DivOrMig1950~ ShareExpellees1950, data=data_1987m2))$r.squared,
                   cor.test(data_1987m2$ShareExpellees1950, data_1987m2$ShareExpellees1950)$estimate, summary(lm(ShareExpellees1950~ ShareExpellees1950, data=data_1987m2))$r.squared),
            cbind(cor.test(data_1987m2$DivOrMig1950, data_1987m2$ShareDestroyed)$estimate, summary(lm(DivOrMig1950~ ShareDestroyed, data=data_1987m2))$r.squared,
                   cor.test(data_1987m2$ShareExpellees1950, data_1987m2$ShareDestroyed)$estimate, summary(lm(ShareExpellees1950~ ShareDestroyed, data=data_1987m2))$r.squared),
             cbind(cor.test(data_1987m2$DivOrMig1950, data_1987m2$ShareCatholic1939)$estimate, summary(lm(DivOrMig1950~ ShareCatholic1939, data=data_1987m2))$r.squared,
                   cor.test(data_1987m2$ShareExpellees1950, data_1987m2$ShareCatholic1939)$estimate, summary(lm(ShareExpellees1950~ ShareCatholic1939, data=data_1987m2))$r.squared),
             cbind(cor.test(data_1987m2$DivOrMig1950, data_1987m2$ShareAgric1939)$estimate, summary(lm(DivOrMig1950~ ShareAgric1939, data=data_1987m2))$r.squared,
                   cor.test(data_1987m2$ShareExpellees1950, data_1987m2$ShareAgric1939)$estimate, summary(lm(ShareExpellees1950~ ShareAgric1939, data=data_1987m2))$r.squared),
             cbind(cor.test(data_1987m2$DivOrMig1950, data_1987m2$lnDistEastBorder)$estimate, summary(lm(DivOrMig1950~ lnDistEastBorder, data=data_1987m2))$r.squared,
                   cor.test(data_1987m2$ShareExpellees1950, data_1987m2$lnDistEastBorder)$estimate, summary(lm(ShareExpellees1950~ lnDistEastBorder, data=data_1987m2))$r.squared),
             cbind(cor.test(data_1987m2$DivOrMig1950, data_1987m2$bevd39)$estimate, summary(lm(DivOrMig1950~ bevd39, data=data_1987m2))$r.squared,
                   cor.test(data_1987m2$ShareExpellees1950, data_1987m2$bevd39)$estimate, summary(lm(ShareExpellees1950~ bevd39, data=data_1987m2))$r.squared),
             cbind(cor.test(data_1987m2$DivOrMig1950, data_1987m2$lnPop1950)$estimate, summary(lm(DivOrMig1950~ lnPop1950, data=data_1987m2))$r.squared,
                   cor.test(data_1987m2$ShareExpellees1950, data_1987m2$lnPop1950)$estimate, summary(lm(ShareExpellees1950~ lnPop1950, data=data_1987m2))$r.squared))

#Warning is bcof  lm(ShareExpellees1950~ ShareExpellees1950, data=data_1987m2) in row 1. Ignore. 

corrs2<-as.data.frame(corrs, row.names=c("Share expellees (1950)", "Share Destroyed (1946)", "Share Catholic (1939)","Share in Agriculture (1939)", "Ln Distance to Eastern border", "Population density (1939)", "Ln Population (1950)"))

names(corrs2)<-c("Pearson Correlation", "Propotion of explained variance (R^2)", "Pearson Correlation", "Propotion of explained variance (R^2)")

xtable(corrs2, digits=3)
             

#Read in data on contemporary economic outcomes: 
enterpr<-read.xlsx("chapter8/Enterprises_AllKreise2006.xlsx")


data_1987m2$ID<-as.numeric(gsub('.{3}$', '', data_1987m2$KREIS87)) # remove last 3 zeros for matching
enterpr$ID<-as.numeric(enterpr$ID)
data_1987m2$ID %in% enterpr$ID 

enterpr$ID[enterpr$Kreis=="      Aachen, krfr. Stadt"]<-5313 #05334002 original
enterpr$ID[enterpr$Kreis=="  Hamburg"]<-2000 


dat<-merge(data_1987m2, enterpr,  by="ID", all.x=TRUE)

dim(dat) #N=327 

##ADD NEW VARIABLES:
add<-read.xlsx("chapter8/HouseholdInc.xlsx")
add$Kennziffer<-as.numeric(add$Kennziffer)
#The Sösestadt Osterode am Harz was the district town of the Osterode am Harz district, which merged with the Göttingen district on November 1, 2016.
#Becoming effective on 21 October 2009, the Städteregion Aachen (literally: "cities region" Aachen) was formed from the former district Aachen (Kreis Aachen) and the city of Aachen.
dat[which(!dat$ID %in%  add$Kennziffer), c(1:2,75)] 

dat<-merge(dat, add, by.x="ID", by.y="Kennziffer", all.x=TRUE) #N=327 (but w/ NAs its less)

#calculate outcome vars
dat$Pop2006<-as.numeric(dat$Pop2006) 
dat$Anzahl2006<-as.numeric(dat$Anzahl2006)
dat$Handel2006<-as.numeric(dat$Handel2006)
dat$Nieder10_49<-as.numeric(dat$Nieder10_49)
dat$NiederlUnder10<-as.numeric(dat$NiederlUnder10)
dat$Nieder50_249<-as.numeric(dat$Nieder50_249)


dat$EnterpPer1000<-dat$Anzahl2006/(dat$Pop2006/1000) 
dat$EnterpUnder250Per1000<-(dat$NiederlUnder10+dat$Nieder10_49+dat$Nieder50_249)/(dat$Pop2006/1000)

summary(dat$EnterpPer1000)
sd(dat$EnterpPer1000, na.rm=TRUE)
summary(dat$EnterpHandelBauVerarbPer1000)
summary(dat$EnterpUnder50Per1000)


vars<-c( "Inform2006", "Finanz2006", "Grundst2006","Freiberuf2006", "SonstWirtschDienst2006","Erziehung2006","Erbringung2006"  )
dat[vars] <- sapply(dat[vars], as.numeric)
dat$Inform2006[is.na(dat$Inform2006)]<-0
dat$Grundst2006[is.na(dat$Grundst2006)]<-0
dat$Freiberuf2006[is.na(dat$Freiberuf2006)]<-0
dat$SonstWirtschDienst2006[is.na(dat$SonstWirtschDienst2006)]<-0
dat$Erbringung2006[is.na(dat$Erbringung2006)]<-0
dat$Erziehung2006[is.na(dat$Erziehung2006)]<-0


dat$Stadtkreis <- ifelse(str_detect(dat$Kreis, "Stadt"), 1,0)  

dat$InfPer1000<-(dat$Inform2006)/(dat$Pop2006/1000)  #Information und Kommunikation (J)
dat$FreiPer1000<-(dat$Freiberuf2006)/(dat$Pop2006/1000)  #Freiberufliche, wiss.u.techn. Dienstleistungen (M)
dat$SonstPer1000<-(dat$SonstWirtschDienst2006)/(dat$Pop2006/1000)  #
dat$EduPer1000<-(dat$Erziehung2006)/(dat$Pop2006/1000)  #
dat$LnHaushaltseink2000<-log(dat$Haushaltseink2000) # in euro per einwohner


#################################################################################
##### Table 8.2: Economic outcomes in West German counties 1950-87 ##############
#################################################################################

selfempl_edu_DVs<-list("lm(Selbst50pt~", "lm(Selbst61pt~", "lm(Selbst70pt~", "lm(Selbst87pt~", "lm(ShareHigherEdu1970~", "lm(ShareHigherEdu1987~") 
expvarsV1 <- list("ShareExpellees1950+ DivOrMig1950 +ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data=dat)")

#create and run formulas + get coefficients
formula.selfemp.edu <- formulas(selfempl_edu_DVs, expvarsV1)
mod.selfemp.edu <- run_models(formula.selfemp.edu) #8 models now, first set without fractionalization
mod.selfemp.edu.cl <- run_coef(mod.selfemp.edu)


texreg(
  l = list(mod.selfemp.edu[[1]], mod.selfemp.edu[[2]], mod.selfemp.edu[[3]],  mod.selfemp.edu[[4]], mod.selfemp.edu[[5]], mod.selfemp.edu[[6]]),
  override.se=list(mod.selfemp.edu.cl[[1]][,2], mod.selfemp.edu.cl[[2]][,2],  mod.selfemp.edu.cl[[3]][,2], mod.selfemp.edu.cl[[4]][,2], mod.selfemp.edu.cl[[5]][,2], mod.selfemp.edu.cl[[6]][,2]),
  override.pval=list(mod.selfemp.edu.cl[[1]][,4], mod.selfemp.edu.cl[[2]][,4],  mod.selfemp.edu.cl[[3]][,4], mod.selfemp.edu.cl[[4]][,4], mod.selfemp.edu.cl[[5]][,4], mod.selfemp.edu.cl[[6]][,4]),
  custom.coef.names = c("Share expellees", "Expellee diversity", "Share destroyed", "Share Catholic", "Share in agriculture", "Ln dist to the Eastern border","Pop density 1939", "Ln population 1950", "City-district", "French zone dummy"),  dcolumn = TRUE,
  custom.model.names = c("Self-Empl 1950", "Self-Empl 1961", "Self-Empl 1970", "Self-Empl 1987", "Edu 1970", "Edu 1987"),
  booktabs = TRUE,
  stars = c(.01,.05,.1),
  omit.coef = "land|Intercept"
)

################################################ 
##### Conley errors       #####################
################################################

m1<-conleyreg(Selbst50pt ~ ShareExpellees1950+ DivOrMig1950 +ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data = dat, 500, lat = "lat", lon = "lon")
m2<-conleyreg(Selbst61pt ~ ShareExpellees1950+ DivOrMig1950 +ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data = dat, 500, lat = "lat", lon = "lon")
m3<-conleyreg(Selbst70pt ~ ShareExpellees1950+ DivOrMig1950 +ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data=dat, 500, lat = "lat", lon = "lon")
m4<-conleyreg(Selbst87pt ~ ShareExpellees1950+ DivOrMig1950 +ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data=dat, 500, lat = "lat", lon = "lon")
m5<-conleyreg(ShareHigherEdu1970 ~ ShareExpellees1950+ DivOrMig1950 +ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data=dat, 500, lat = "lat", lon = "lon")
m6<-conleyreg(ShareHigherEdu1987 ~ ShareExpellees1950+ DivOrMig1950 +ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data=dat, 500, lat = "lat", lon = "lon")

texreg(
  l = list(m1,m2,m3,m4, m5, m6),
  override.se=list(m1[,2], m2[,2],  m3[,2], m4[,2],m5[,2],m6[,2]),
  override.pval=list(m1[,4], m2[,4], m3[,4], m4[,4], m5[,4], m6[,4]),
  custom.coef.names = c("Share expellees", "Expellee diversity", "Share destroyed", "Share Catholic", "Share in agriculture", "Ln Dist to the Eastern border","Pop density 1939", "ln Population 1950", "City-district", "French zone dummy"), 
  custom.model.names = c("Self-Empl 1950", "Self-Empl 1961", "Self-Empl 1970", "Self-Empl 1987", "Edu 1970", "Edu 1987"),
  dcolumn = TRUE,
  booktabs = TRUE,
  stars = c(.01,.05,.1),
  omit.coef = "land|Intercept"
)

###############################################################################
##### Figure 8.3: Plotting coefficients w/ heteroskedasticity-robust ses ######
############################################################################### 

cse = function(reg) {
  rob = sqrt(diag(vcovHC(reg, type = "HC1")))
  return(rob)
}
hc1<-function(x) vcovHC(x, type="HC1")  


self.edu1s<-standardize(mod.selfemp.edu[[1]], unchanged="land", standardize.y=TRUE)
self.edu2s<-standardize(mod.selfemp.edu[[2]], unchanged="land", standardize.y=TRUE)
self.edu3s<-standardize(mod.selfemp.edu[[3]], unchanged="land", standardize.y=TRUE)
self.edu4s<-standardize(mod.selfemp.edu[[4]], unchanged="land", standardize.y=TRUE)
self.edu5s<-standardize(mod.selfemp.edu[[5]], unchanged="land", standardize.y=TRUE)
self.edu6s<-standardize(mod.selfemp.edu[[6]], unchanged="land", standardize.y=TRUE)

#### Let's plot the coefficients on "ShareExpellees1950" and "DivOrMig1950"

plot.outRefugees <- sapply(list(self.edu1s, self.edu2s,  self.edu3s, self.edu4s, self.edu5s, self.edu6s), function(x){
  coeftest(x, vcov.=hc1)[2,1:2] #Check this number
})

#Additional models: relief onset and termination separately: reg4a, reg5a,
plot.outDiversity <- sapply(list(self.edu1s, self.edu2s,  self.edu3s, self.edu4s, self.edu5s, self.edu6s), function(x){ 
  coeftest(x, vcov.=hc1)[3,1:2] 
})

plot.outRefugees <- as.data.frame(t(plot.outRefugees))
plot.outDiversity <- as.data.frame(t(plot.outDiversity))

plot.outAllS <- rbind(plot.outRefugees, plot.outDiversity)

plot.outAllS$dv <- factor(rep(c("Self-empl 1950", "Self-empl 1961", "Self-empl 1970", "Self-empl 1987", "Higher edu in 1970", "Higher edu in 1987"), 2), 
                          levels=c("Self-empl 1950", "Self-empl 1961", "Self-empl 1970", "Self-empl 1987", "Higher edu in 1970", "Higher edu in 1987")) 
plot.outAllS$iv <- c(rep("Share expellees",6), rep("Diversity of expellees", 6))
plot.outAllS$lb <- plot.outAllS$Estimate - 1.96*plot.outAllS$`Std. Error`
plot.outAllS$ub <- plot.outAllS$Estimate + 1.96*plot.outAllS$`Std. Error`

figure8.3 = ggplot(
  data = plot.outAllS,
  mapping = aes(
    x = dv,
    y = Estimate,
    shape = iv
  )
) +
  geom_hline(yintercept = 0, colour = "grey") +
  geom_pointrange(mapping = aes(ymin = lb, ymax = ub), alpha = 0.75, position = position_dodge(width = 0.1), size=1) +
  scale_y_continuous(name = "Average marginal effect") +scale_shape_manual(values = c(2, 16))+
  scale_x_discrete(name = NULL, labels = c("Self-employed 1950", "Self-employed 1961", "Self-employed 1970", "Self-employed 1987", "Higher edu 1970", "Higher edu 1987")) +
  theme_bw() +
  theme(
    text=element_text(size=12, family="sans", color="black"),
    legend.position = "bottom",
    legend.text = element_text(size = 12,family="sans"),
    axis.text.x = element_text(angle = 47, hjust = 1, size=12, family="sans", color="black"),
    legend.title = element_blank(),
    plot.margin = unit(c(.5,.5,.5,1), "cm")
  )

ggsave("Self_empl_Edu.jpg", figure8.3, width = 8, height = 6)


########################################################
##### Table A.21 AGRICULTURAL EMPLOYMENT  ##############
########################################################

agric_DVs<-list("lm(Agr1950~", "lm(Agr1961~", "lm(AgrErwrbst1970~", "lm(AgrErwrbst1987w~") 
expvarsV1 <- list("ShareExpellees1950+ DivOrMig1950 +ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data=dat)")

#create and run formulas + get coefficients
formula.agric <- formulas(agric_DVs, expvarsV1)
mod.agric <- run_models(formula.agric) 
mod.agric.cl <- run_coef(mod.agric)


### STARGAZER TABLES: 
stargazer(mod.agric[[1]], mod.agric[[2]], mod.agric[[3]], mod.agric[[4]], 
          se = list(mod.agric.cl[[1]][,2], mod.agric.cl[[2]][,2], mod.agric.cl[[3]][,2], mod.agric.cl[[4]][,2]),
          p=list(mod.agric.cl[[1]][,4], mod.agric.cl[[2]][,4], mod.agric.cl[[3]][,4], mod.agric.cl[[4]][,4]),
          digits=2, 
          covariate.labels=c("Share expellees", "Expellee diversity",  "Share destroyed", "Share Catholic", "Share in agriculture", "Ln dist to the Eastern border","Pop density 1939", "Ln population 1950", "City-district", "French zone dummy"), 
          omit=c("land", "Constant"), omit.stat=c("ser","f", "rsq"), no.space=TRUE, column.labels = c("Agriculture 1950", "Agriculture 1961", "Agriculture 1970", "Agriculture 1987"), 
          title="Agricultural workforce (share). Robust SEs in parentheses.") #


## FIGURE 8.4: Share and diversity of expellees and agricultural employment

ag1s<-standardize(mod.agric[[1]], unchanged="land", standardize.y=TRUE)
ag2s<-standardize(mod.agric[[2]], unchanged="land", standardize.y=TRUE)
ag3s<-standardize(mod.agric[[3]], unchanged="land", standardize.y=TRUE)
ag4s<-standardize(mod.agric[[4]], unchanged="land", standardize.y=TRUE)

#### Plot the coefficients on "ShareExpellees1950" and "DivOrMig1950"

plot.outRefugees <- sapply(list(ag1s, ag2s,  ag3s, ag4s), function(x){
  coeftest(x, vcov.=hc1)[2,1:2] #Check this number
})

#Additional models: relief onset and termination separately:
plot.outDiversity <- sapply(list(ag1s, ag2s,  ag3s, ag4s), function(x){ 
  coeftest(x, vcov.=hc1)[3,1:2] 
})

plot.outRefugees <- as.data.frame(t(plot.outRefugees))
plot.outDiversity <- as.data.frame(t(plot.outDiversity))

plot.outAllS <- rbind(plot.outRefugees, plot.outDiversity)

plot.outAllS$dv <- factor(rep(c("Agriculture 1950", "Agriculture 1961", "Agriculture 1970", "Agriculture 1987"), 2), 
                          levels=c("Agriculture 1950", "Agriculture 1961", "Agriculture 1970", "Agriculture 1987")) 
plot.outAllS$iv <- c(rep("Share expellees",4), rep("Diversity of expellees", 4))
plot.outAllS$lb <- plot.outAllS$Estimate - 1.96*plot.outAllS$`Std. Error`
plot.outAllS$ub <- plot.outAllS$Estimate + 1.96*plot.outAllS$`Std. Error`

figure8.4 = ggplot(
  data = plot.outAllS,
  mapping = aes(
    x = dv,
    y = Estimate,
    shape = iv
  )
) +
  geom_hline(yintercept = 0, colour = "grey") +
  geom_pointrange(mapping = aes(ymin = lb, ymax = ub), alpha = 0.75, position = position_dodge(width = 0.1), size=1) +
  scale_y_continuous(name = "Average partial effect") +scale_shape_manual(values = c(2, 16))+
  scale_x_discrete(name = NULL, labels = c("Agriculture 1950", "Agriculture 1961", "Agriculture 1970", "Agriculture 1987")) +
  theme_bw() +
  theme(
    text=element_text(size=12, family="sans", color="black"),
    legend.position = "bottom",
    legend.text = element_text(size = 12,family="sans"),
    axis.text.x = element_text(angle = 47, hjust = 1, size=12, family="sans", color="black"),
    legend.title = element_blank(),
    plot.margin = unit(c(.5,.5,.5,1), "cm")
  )

ggsave("Coefs_Agriculture.jpg",figure8.4, width = 8, height = 6)


## Conley errors for table A.21:

m1<-conleyreg(Agr1950 ~ ShareExpellees1950+ DivOrMig1950 +ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data = dat, 500, lat = "lat", lon = "lon")
m2<-conleyreg(Agr1961 ~ ShareExpellees1950+ DivOrMig1950 +ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data = dat, 500, lat = "lat", lon = "lon")
m3<-conleyreg(AgrErwrbst1970 ~ ShareExpellees1950+ DivOrMig1950 +ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data=dat, 500, lat = "lat", lon = "lon")
m4<-conleyreg(AgrErwrbst1987w ~ ShareExpellees1950+ DivOrMig1950 +ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data=dat, 500, lat = "lat", lon = "lon")

texreg(
  l = list(m1,m2,m3,m4),
  override.se=list(m1[,2], m2[,2],  m3[,2], m4[,2]),
  override.pval=list(m1[,4], m2[,4], m3[,4], m4[,4]),
  custom.coef.names = c("Share expellees", "Expellee diversity", "Share destroyed", "Share Catholic", "Share in agriculture", "Ln Dist to the Eastern border","Pop density 1939", "ln Population 1950", "City-district", "French zone dummy"), 
  custom.model.names = c("Agric 1950", "Agric 1961", "Agric 1970", "Agric 1987"),
  dcolumn = TRUE,
  booktabs = TRUE,
  stars = c(.01,.05,.1),
  omit.coef = "land|Intercept"
)



#####################################################################################
######### Table A.22. Entrepreneurship, household income in the 2000s  ##############
#####################################################################################


contem_DVs<-list("lm(EnterpPer1000~", "lm(FreiPer1000~", "lm(InfPer1000~", "lm(SonstPer1000~", "lm(EduPer1000~", "lm(LnHaushaltseink2000~") 
expvarsV1 <- list("ShareExpellees1950+ DivOrMig1950 +ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data=dat)")

#create and run formulas + get coefficients
formula.contemp <- formulas(contem_DVs, expvarsV1)
mod.contemp <- run_models(formula.contemp) #8 models now, first set without fractionalization
mod.contemp.cl <- run_coef(mod.contemp)


texreg(
  l = list(mod.contemp[[1]], mod.contemp[[2]], mod.contemp[[3]], mod.contemp[[4]], mod.contemp[[5]], mod.contemp[[6]]),
  override.se=list(mod.contemp.cl[[1]][,2], mod.contemp.cl[[2]][,2], mod.contemp.cl[[3]][,2], mod.contemp.cl[[4]][,2], mod.contemp.cl[[5]][,2], mod.contemp.cl[[6]][,2]),
  override.pval=list(mod.contemp.cl[[1]][,4], mod.contemp.cl[[2]][,4], mod.contemp.cl[[3]][,4], mod.contemp.cl[[4]][,4], mod.contemp.cl[[5]][,4], mod.contemp.cl[[6]][,4]),
  custom.coef.names = c("Share expellees", "Expellee diversity", "Share destroyed", "Share Catholic", "Share in agriculture", "Ln Dist to the Eastern border","Pop density 1939", "ln Population 1950", "City-district", "French zone dummy"), 
  custom.model.names = c("All companies",  "Professional, science, tech", "Information, communication", "Administrative services", "Education", "Ln(Household income in 2000)"), 
  dcolumn = TRUE,
  booktabs = TRUE,
  stars = c(.01,.05,.1),
  omit.coef = "land|Intercept"
)

##########################################
######### Add Conley errors to table A.22
##########################################
m1<-conleyreg(EnterpPer1000 ~ ShareExpellees1950+ DivOrMig1950 +ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data = dat, 500, lat = "lat", lon = "lon")
m2<-conleyreg(FreiPer1000 ~ ShareExpellees1950+ DivOrMig1950 +ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data = dat, 500, lat = "lat", lon = "lon")
m3<-conleyreg(InfPer1000 ~ ShareExpellees1950+ DivOrMig1950 +ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data=dat, 500, lat = "lat", lon = "lon")
m4<-conleyreg(SonstPer1000 ~ ShareExpellees1950+ DivOrMig1950 +ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data=dat, 500, lat = "lat", lon = "lon")
m5<-conleyreg(EduPer1000 ~ ShareExpellees1950+ DivOrMig1950 +ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data=dat, 500, lat = "lat", lon = "lon")
m6<-conleyreg(LnHaushaltseink2000 ~ ShareExpellees1950+ DivOrMig1950 +ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data=dat, 500, lat = "lat", lon = "lon")

texreg(
  l = list(m1,m2,m3,m4,m5,m6),
  override.se=list(m1[,2], m2[,2],  m3[,2], m4[,2], m5[,2], m6[,2]),
  override.pval=list(m1[,4], m2[,4], m3[,4], m4[,4], m5[,4], m6[,4]),
  custom.coef.names = c("Share expellees", "Expellee diversity", "Share destroyed", "Share Catholic", "Share in agriculture", "Ln Dist to the Eastern border","Pop density 1939", "ln Population 1950", "City-district", "French zone dummy"), 
  custom.model.names = c("All companies",  "Professional, science, tech", "Information, communication", "Administrative services", "Education", "Ln(Household income in 2000)"), 
  dcolumn = TRUE,
  booktabs = TRUE,
  stars = c(.01,.05,.1),
  omit.coef = "land|Intercept"
)


### FIGURE 8.5 BASED ON REGULAR ERRORS (which are more conservative):

cont1s<-standardize(mod.contemp[[1]], unchanged="land", standardize.y=TRUE)
cont2s<-standardize(mod.contemp[[2]], unchanged="land", standardize.y=TRUE)
cont3s<-standardize(mod.contemp[[3]], unchanged="land", standardize.y=TRUE)
cont4s<-standardize(mod.contemp[[4]], unchanged="land", standardize.y=TRUE)
cont5s<-standardize(mod.contemp[[5]], unchanged="land", standardize.y=TRUE)
cont6s<-standardize(mod.contemp[[6]], unchanged="land", standardize.y=TRUE)

#### Let's plot the coefficients on "ShareExpellees1950" and "DivOrMig1950"

plot.outRefugees <- sapply(list(cont1s, cont2s,  cont3s, cont4s, cont5s, cont6s), function(x){
  coeftest(x, vcov.=hc1)[2,1:2] #Check this number
})


plot.outDiversity <- sapply(list(cont1s, cont2s,  cont3s, cont4s, cont5s, cont6s), function(x){ 
  coeftest(x, vcov.=hc1)[3,1:2] 
})

plot.outRefugees <- as.data.frame(t(plot.outRefugees))
plot.outDiversity <- as.data.frame(t(plot.outDiversity))

plot.outAllS <- rbind(plot.outRefugees, plot.outDiversity)

plot.outAllS$dv <- factor(rep(c("All companies (2008)",  "Professional, science and tech", "Information and communication", "Administrative services", "Education", "Ln household income in (2000)"), 2), 
                          levels=c("All companies (2008)",  "Professional, science and tech", "Information and communication", "Administrative services", "Education", "Ln household income in (2000)")) 
plot.outAllS$iv <- c(rep("Share expellees",6), rep("Diversity of expellees", 6))
plot.outAllS$lb <- plot.outAllS$Estimate - 1.96*plot.outAllS$`Std. Error`
plot.outAllS$ub <- plot.outAllS$Estimate + 1.96*plot.outAllS$`Std. Error`

figure8.5 = ggplot(
  data = plot.outAllS,
  mapping = aes(
    x = dv,
    y = Estimate,
    shape = iv
  )
) +
  geom_hline(yintercept = 0, colour = "grey") +
  geom_pointrange(mapping = aes(ymin = lb, ymax = ub), alpha = 0.75, position = position_dodge(width = 0.1), size=1) +
  scale_y_continuous(name = "Average marginal effect") +scale_shape_manual(values = c(2, 16))+
  scale_x_discrete(name = NULL, labels = c("All companies (2008)",  "Professional, science and tech", "Information and communication", "Administrative services", "Education", "Ln household income (2000)")) +
  theme_bw() +
  theme(
    text=element_text(size=12, family="sans", color="black"),
    legend.position = "bottom",
    legend.text = element_text(size = 12,family="sans"),
    axis.text.x = element_text(angle = 47, hjust = 1, size=12, family="sans", color="black"),
    legend.title = element_blank(),
    plot.margin = unit(c(.5,.5,.5,1), "cm")
  )

ggsave("Figure8.5.jpg",figure8.5, width = 8, height = 6)


#########################################################################
####### Table A.23: ROBUSTNESS CHECK, MIGRATION IN LATER PERIODS ########
#########################################################################


selfempl_edu_DVs<-list("lm(Selbst61pt~", "lm(Selbst70pt~", "lm(Selbst87pt~", "lm(ShareHigherEdu1970~", "lm(ShareHigherEdu1987~") 
expvarsV2 <- list("ShareOstExpelleesOnly1961+ DivOrMig1950 ++ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data=dat)")

#create and run formulas + get coefficients
formula.selfemp.eduV2 <- formulas(selfempl_edu_DVs, expvarsV2)
mod.selfemp.eduV2 <- run_models(formula.selfemp.eduV2) 
mod.selfemp.eduV2.cl <- run_coef(mod.selfemp.eduV2)

stargazer(mod.selfemp.eduV2[[1]], mod.selfemp.eduV2[[2]], mod.selfemp.eduV2[[3]],  mod.selfemp.eduV2[[4]], mod.selfemp.eduV2[[5]],
          se = list(mod.selfemp.eduV2.cl[[1]][,2], mod.selfemp.eduV2.cl[[2]][,2],  mod.selfemp.eduV2.cl[[3]][,2], mod.selfemp.eduV2.cl[[4]][,2], mod.selfemp.eduV2.cl[[5]][,2]), 
          p=list(mod.selfemp.eduV2.cl[[1]][,4], mod.selfemp.eduV2.cl[[2]][,4],  mod.selfemp.eduV2.cl[[3]][,4], mod.selfemp.eduV2.cl[[4]][,4], mod.selfemp.eduV2.cl[[5]][,4]),
          digits=2, 
          covariate.labels=c("Share expellees","Expellee diversity", "Share destroyed", "Share Catholic", "Share in agriculture", "Ln dist to the Soviet Zone", "Pop density",  "Ln population", "Kreis type", "French zone dummy"), 
          omit=c("land", "Constant"), omit.stat=c("ser","f", "rsq"), no.space=TRUE, column.labels = c("Self-Empl 1961", "Self-Empl 1970", "Self-Empl 1987", "Edu 1970", "Edu 1987"), 
          title="Alternative specification with expellee share measured in 1961. Robust SEs in parentheses.") #

texreg(
  l = list(mod.selfemp.eduV2[[1]], mod.selfemp.eduV2[[2]], mod.selfemp.eduV2[[3]],  mod.selfemp.eduV2[[4]], mod.selfemp.eduV2[[5]]),
  override.se=list(mod.selfemp.eduV2.cl[[1]][,2], mod.selfemp.eduV2.cl[[2]][,2],  mod.selfemp.eduV2.cl[[3]][,2], mod.selfemp.eduV2.cl[[4]][,2], mod.selfemp.eduV2.cl[[5]][,2]), 
  override.pval=list(mod.selfemp.eduV2.cl[[1]][,4], mod.selfemp.eduV2.cl[[2]][,4],  mod.selfemp.eduV2.cl[[3]][,4], mod.selfemp.eduV2.cl[[4]][,4], mod.selfemp.eduV2.cl[[5]][,4]),
  custom.coef.names = c("Share expellees", "Expellee diversity", "Share destroyed", "Share Catholic", "Share in agriculture", "Ln Dist to the Eastern border","Pop density 1939", "ln Population 1950", "City-district", "French zone dummy"), 
  custom.model.names = c("Self-Empl 1961", "Self-Empl 1970", "Self-Empl 1987", "Edu 1970", "Edu 1987"),  
  dcolumn = TRUE,
  booktabs = TRUE,
  stars = c(.01,.05,.1),
  omit.coef = "land|Intercept"
)

## Conley errors: 
m1<-conleyreg(Selbst61pt ~ ShareOstExpelleesOnly1961+ DivOrMig1950 +ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data = dat, 500, lat = "lat", lon = "lon")
m2<-conleyreg(Selbst70pt ~ ShareOstExpelleesOnly1961+ DivOrMig1950 +ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data = dat, 500, lat = "lat", lon = "lon")
m3<-conleyreg(Selbst87pt ~ ShareOstExpelleesOnly1961+ DivOrMig1950 +ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data=dat, 500, lat = "lat", lon = "lon")
m4<-conleyreg(ShareHigherEdu1970 ~ ShareOstExpelleesOnly1961+ DivOrMig1950 +ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data=dat, 500, lat = "lat", lon = "lon")
m5<-conleyreg(ShareHigherEdu1987 ~ ShareOstExpelleesOnly1961+ DivOrMig1950 +ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data=dat, 500, lat = "lat", lon = "lon")

texreg(
  l = list(m1,m2,m3,m4,m5),
  override.se=list(m1[,2], m2[,2],  m3[,2], m4[,2], m5[,2]),
  override.pval=list(m1[,4], m2[,4], m3[,4], m4[,4], m5[,4]),
  custom.coef.names = c("Share expellees", "Expellee diversity", "Share destroyed", "Share Catholic", "Share in agriculture", "Ln Dist to the Eastern border","Pop density 1939", "ln Population 1950", "City-district", "French zone dummy"), 
  custom.model.names = c("Self-Empl 1961", "Self-Empl 1970", "Self-Empl 1987", "Edu 1970", "Edu 1987"),  
  dcolumn = TRUE,
  booktabs = TRUE,
  stars = c(.01,.05,.1),
  omit.coef = "land|Intercept"
)


##########################################################################################
####### Table A.24: ROBUSTNESS CHECK, MIGRATION IN LATER PERIODS & 2000s outcomes ########
##########################################################################################

contem_DVs<-list("lm(EnterpPer1000~", "lm(FreiPer1000~", "lm(InfPer1000~", "lm(SonstPer1000~", "lm(EduPer1000~", "lm(LnHaushaltseink2000~") 
expvarsV2 <- list("ShareRefugees1961+ DivOrMig1950 ++ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data=dat)")

#create and run formulas + get coefficients
formula.contemp <- formulas(contem_DVs, expvarsV2)
mod.contemp <- run_models(formula.contemp) #8 models now, first set without fractionalization
mod.contemp.cl <- run_coef(mod.contemp)


texreg(
  l = list(mod.contemp[[1]], mod.contemp[[2]], mod.contemp[[3]], mod.contemp[[4]], mod.contemp[[5]], mod.contemp[[6]]),
  override.se=list(mod.contemp.cl[[1]][,2], mod.contemp.cl[[2]][,2], mod.contemp.cl[[3]][,2], mod.contemp.cl[[4]][,2], mod.contemp.cl[[5]][,2], mod.contemp.cl[[6]][,2]),
  override.pval=list(mod.contemp.cl[[1]][,4], mod.contemp.cl[[2]][,4], mod.contemp.cl[[3]][,4], mod.contemp.cl[[4]][,4], mod.contemp.cl[[5]][,4], mod.contemp.cl[[6]][,4]),
  custom.coef.names = c("Share expellees 1961", "Expellee diversity", "Share destroyed", "Share Catholic", "Share in agriculture", "Ln Dist to the Eastern border","Pop density 1939", "ln Population 1950", "City-district", "French zone dummy"), 
  custom.model.names = c("All companies",  "Professional, science, tech", "Information, communication", "Administrative services", "Education", "Ln(Household income in 2000)"), 
  dcolumn = TRUE,
  booktabs = TRUE,
  stars = c(.01,.05,.1),
  omit.coef = "land|Intercept"
)


################################################################
####### Table A.25: Origins of the largest group; alt exp  #####
################################################################

ols1<-lm(Selbst50pt~ShareExpellees1950+DivOrMig1950+ largestGr+ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data=data_1987m2)
ols2<-lm(Selbst61pt~ShareExpellees1950+DivOrMig1950+ largestGr+ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data=data_1987m2)
ols3<-lm(Selbst70pt~ShareExpellees1950+DivOrMig1950+ largestGr+ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data=data_1987m2)
ols4<-lm(Selbst87pt~ShareExpellees1950+DivOrMig1950+ largestGr+ ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data=data_1987m2)

edu1<-lm(ShareHigherEdu1970~ShareExpellees1950+DivOrMig1950+largestGr + ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data=data_1987m2)
edu2<-lm(ShareHigherEdu1987~ShareExpellees1950+DivOrMig1950+largestGr + ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data=data_1987m2)


ses1 <- list(coeftest(ols1, vcov = vcovHC(ols1, type="HC1"))[,2], coeftest(ols2, vcov = vcovHC(ols2, type="HC1"))[,2], coeftest(ols3, vcov = vcovHC(ols3, type="HC1"))[,2], coeftest(ols4, vcov = vcovHC(ols4, type="HC1"))[,2], 
             coeftest(edu1, vcov = vcovHC(edu1, type="HC1"))[,2], coeftest(edu2, vcov = vcovHC(edu2, type="HC1"))[,2])

pvals1 <- list(coeftest(ols1, vcov = vcovHC(ols1, type="HC1"))[,4], coeftest(ols2, vcov = vcovHC(ols2, type="HC1"))[,4], 
               coeftest(ols3, vcov = vcovHC(ols3, type="HC1"))[,4], coeftest(ols4, vcov = vcovHC(ols4, type="HC1"))[,4], 
               coeftest(edu1, vcov = vcovHC(edu1, type="HC1"))[,4], coeftest(edu2, vcov = vcovHC(edu2, type="HC1"))[,4])


stargazer(ols1, ols2, ols3, ols4, edu1, edu2, se = ses1, p=pvals1, digits=2, 
          covariate.labels=c("Share expellees", "Expellee diversity (Origin)","Largest Group: Reichsdeutsche", "Largest Group: Sudetendeutsche", "Share Destroyed", "Share Catholic", "Share in Agriculture", "Ln Dist to the Eastern border","Pop Density 1939", "ln Population", "City-district", "French zone dummy "), 
          omit=c("land", "Constant"), omit.stat=c("ser","f", "rsq"), no.space=TRUE, column.labels = c("Self-Empl 1950", "Self-Empl 1961", "Self-Empl 1970", "Self-Empl 1987", "Edu 1970", "Edu 1987"), 
          title="Self-employmement and education level. Robust SEs in parentheses.") #



################################################################
####### Table A.26: Origins of the largest group; alt exp  #####
################################################################


ols5<-lm(EnterpPer1000~ShareExpellees1950+DivOrMig1950+ largestGr+ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data=dat)
ols6<-lm(FreiPer1000~ShareExpellees1950+DivOrMig1950+ largestGr+ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data=dat)
ols7<-lm(InfPer1000~ShareExpellees1950+DivOrMig1950+ largestGr+ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data=dat)
ols8<-lm(SonstPer1000~ShareExpellees1950+DivOrMig1950+ largestGr+ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data=dat)
ols9<-lm(EduPer1000~ShareExpellees1950+DivOrMig1950+ largestGr+ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data=dat)
ols10<-lm(LnHaushaltseink2000~ShareExpellees1950+DivOrMig1950+ largestGr+ShareDestroyed+ShareCatholic1939+ShareAgric1939+lnDistEastBorder+bevd39+lnPop1950+ks+land+FrenchZone, data=dat)


ses1 <- list(coeftest(ols5, vcov = vcovHC(ols5, type="HC1"))[,2], coeftest(ols6, vcov = vcovHC(ols6, type="HC1"))[,2], coeftest(ols7, vcov = vcovHC(ols7, type="HC1"))[,2], 
             coeftest(ols8, vcov = vcovHC(ols8, type="HC1"))[,2], coeftest(ols9, vcov = vcovHC(ols9, type="HC1"))[,2], coeftest(ols10, vcov = vcovHC(ols10, type="HC1"))[,2])

pvals1 <- list(coeftest(ols5, vcov = vcovHC(ols5, type="HC1"))[,4], coeftest(ols6, vcov = vcovHC(ols6, type="HC1"))[,4], 
               coeftest(ols7, vcov = vcovHC(ols7, type="HC1"))[,4], coeftest(ols8, vcov = vcovHC(ols8, type="HC1"))[,4], 
               coeftest(ols9, vcov = vcovHC(ols9, type="HC1"))[,4], coeftest(ols10, vcov = vcovHC(ols10, type="HC1"))[,4])

### Table A.26 #
stargazer(ols5, ols6, ols7, ols8, ols9, ols10, se = ses1, p=pvals1, digits=2, 
          covariate.labels=c("Share expellees", "Expellee Diversity (Origin)","Largest Group: Reichsdeutsche", "Largest Group: Sudetendeutsche", "Share Destroyed", "Share Catholic", "Share in Agriculture", "Ln Dist to the Eastern border","Pop Density 1939", "ln Population", "City-district", "French zone dummy"), 
          omit=c("land", "Constant"), omit.stat=c("ser","f", "rsq"), no.space=TRUE, 
          column.labels = c("All companies 1950",  "Professional, science and tech", "Information & Communication", "Administrative Services", "Education", "Ln(Household income in 2000)"), 
          title="Contemporary economic indicators after controlling for the dominant group of refugees in a given district. Robust SEs in parentheses.")


